import json
import time
import sys

from config import getConfigParams
from formatContent import formatContent, removeSrcParams
from requestUrl import getDocByUrl,getEntryIdByUrl
from logs import saveLog

# 获取配置参数
configParams = getConfigParams(sys.argv)
url = configParams['real_url']
env = configParams['env']

# 获取文章ID
entryId = getEntryIdByUrl(configParams['base_url'])

# 获取html文本doc，得到pq对象
doc = getDocByUrl(url)

# 标题
title = doc('.title-article').text()

# 文章正文
articleContent = removeSrcParams(doc('#content_views'))

# 文章作者
authorName = doc('.follow-nickName').text()

# 时间
publicTime = doc('.article-bar-top span.time').text()

# 去【李世泉】那里过滤标签
content = formatContent(articleContent.html(),env=env)

result = {
    'entry_id': entryId,
    'not_format_content': articleContent.html(),
    'content': content,
    'author_name': authorName,
    'cover_url': '',
    'public_time': 0,
    'description': '',
    'title': title,

}
logData = {
    'params': sys.argv,
    'result': result
}
saveLog(json.dumps(logData, ensure_ascii=False))
print(json.dumps(result, ensure_ascii=False))
